#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

import multiprocessing
import os


def TestBeautifulSoup(filename):
    from bs4 import BeautifulSoup
    infile=open(filename)
    outfile=open(filename+".ids","w")
    soup=BeautifulSoup(infile,"xml")
    # print(soup.prettify())
    pmids=soup.find_all("PMID")
    for pmid in pmids:
        parent_name=pmid.parent.name
        if parent_name=="MedlineCitation" or parent_name=="BookDocument":
            outfile.write(pmid.string+"\n")
            outfile.flush()
    outfile.close()
    infile.close()

if __name__ == "__main__":
    pool = multiprocessing.Pool(processes=12)

    list=os.listdir(".")
    for filename in list:
        if filename.endswith(".xml"):
            pool.apply_async(TestBeautifulSoup, (filename,))
    pool.close()
    pool.join()